# Lab 05: Data Visualization
# ---
# author: "Jack"
# ---
# Library loading
library(ggplot2)
library(gapminder)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
head(cars)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
# Display stopping distance (ft) as a function of speed (mph) from the 'cars'
# dataset using ggplot2
# Option 1
# ggplot(data = cars) +
# aes(x = speed, y = dist) +
# geom_point() +
# title(main = "Stopping Distance of Old Cars") +
# xlab("Speed (MPH)") +
# ylab("Stopping Distance (ft)")
# Option 2
ggplot(data = cars) +
aes(x = speed, y = dist) +
geom_point() +
geom_smooth(formula = y ~ x,
method = "lm") +
labs(title = "Stopping Distance of Old Cars",
x = "Speed (MPH)",
y = "Stopping Distance (ft)")

# Base graphics translation
plot(x = cars$speed,
y = cars$dist,
pch = 16,
main = "Stopping Distance of Old Cars\n[base graphics]",
xlab = "Speed (MPH)",
ylab = "Stopping Distance (ft)")
abline(lm(cars$dist ~ cars$speed),
col = "blue",
lwd = 2)

# Loading in the genes data
url <- "https://bioboot.github.io/bimm143_S20/class-material/up_down_expression.txt"
genes <- read.delim(url)
head(genes)
## Gene Condition1 Condition2 State
## 1 A4GNT -3.6808610 -3.4401355 unchanging
## 2 AAAS 4.5479580 4.3864126 unchanging
## 3 AASDH 3.7190695 3.4787276 unchanging
## 4 AATF 5.0784720 5.0151916 unchanging
## 5 AATK 0.4711421 0.5598642 unchanging
## 6 AB015752.4 -3.6808610 -3.5921390 unchanging
nrow(genes)
## [1] 5196
colnames(genes)
## [1] "Gene" "Condition1" "Condition2" "State"
ncol(genes)
## [1] 4
table(genes$State)
##
## down unchanging up
## 72 4997 127
round(table(genes$State)["up"]/nrow(genes)*100, 2)
## up
## 2.44
p <- ggplot(data = genes) +
aes(x = Condition1, y = Condition2,
col = State) +
geom_point()
p

p <- p +
scale_color_manual(values = c("red", "gray", "blue"))
p

p <- p +
labs(title = "Gene Expression Changes Upon Drug Treatment",
x = "Control (no drug)",
y = "Drug Treatment")
p

# gapminder dataset
gapminder_2007 <- gapminder %>% filter(year==2007)
# Exploring the data
p <- ggplot(data = gapminder) +
aes(x = year, y = lifeExp) +
geom_violin(aes(group = year), draw_quantiles = c(0.5)) +
geom_jitter(aes(col = continent), alpha = 0.4, width = 0.3)
p

ggplotly(p)
p <- ggplot(data = gapminder_2007) +
aes(x = gdpPercap, y = lifeExp, col = continent, size = pop) +
geom_point(alpha=0.4)
p

# Gross example with population as color
q <- ggplot(data = gapminder_2007) +
aes(x = gdpPercap, y = lifeExp, col = pop) +
geom_point()
q

# Adjusting point size
p <- ggplot(data = gapminder_2007) +
aes(x = gdpPercap, y = lifeExp, size = pop) +
geom_point(alpha=0.4)
p

p <- p + scale_size_area()
p

# Exploring 1957 gapfinder data
gapminder_1957 <- gapminder %>% filter(year==1957)
p <- ggplot(data = gapminder_1957) +
aes(x = gdpPercap, y = lifeExp, col = continent, size = pop) +
geom_point(alpha=0.7) +
scale_size_area(max_size = 15)
p

gapminder_1957_2007 <- gapminder %>% filter(year==1957 | year == 2007)
p <- ggplot(data = gapminder_1957_2007) +
aes(x = gdpPercap, y = lifeExp, col = continent, size = pop) +
geom_point(alpha=0.7) +
scale_size_area(max_size = 15) +
facet_wrap(~year)
p

# Boxplots
gapminder_2007_top5 <- gapminder %>%
filter(year == 2007) %>%
arrange(desc(pop)) %>%
top_n(5, pop)
gapminder_2007_top5
## # A tibble: 5 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 China Asia 2007 73.0 1318683096 4959.
## 2 India Asia 2007 64.7 1110396331 2452.
## 3 United States Americas 2007 78.2 301139947 42952.
## 4 Indonesia Asia 2007 70.6 223547000 3541.
## 5 Brazil Americas 2007 72.4 190010647 9066.
ggplot(data = gapminder_2007_top5) +
aes(x = country, y = pop) +
geom_col()
